##########################################################################################################################################
#################################### Industry Concentration and Risk -- Tests ############################################################

########################### Concentration Ratios ###########################################################################
#Three digit NAICS Code:
load("HHI_3.RData")

#Four digit NAICS Code:
load("HHI_4.RData")

########################### Beta Measures ##################################################################################
#### Monthly
load("Beta_CF.RData")
load("Beta_DR.RData")
load("Beta_M.RData")
load("Beta_Down.RData")
load("Beta_Up.RData")
#Weighted
load("Weight_Beta_CF.RData")
load("Weight_Beta_DR.RData")
load("Weight_Beta_M.RData")
load("Weight_Beta_Down.RData")
load("Weight_Beta_Up.RData")
#Shrinkage
load("Shrinking_Beta_CF.RData")
load("Shrinking_Beta_DR.RData")
load("Shrinking_Beta_M.RData")
load("Shrinking_Beta_Down.RData")
load("Shrinking_Beta_Up.RData")
#Weighting and Shrinkage
load("Weighting_Shrinking_Beta_CF.RData")
load("Weighting_Shrinking_Beta_DR.RData")
load("Weighting_Shrinking_Beta_M.RData")
load("Weighting_Shrinking_Beta_Down.RData")
load("Weighting_Shrinking_Beta_Up.RData")


load("I_Vol.RData")
#Beta_X[1,], shows the PERMNO for each company, HHI_3 has no PERMNO, so we need a matching of PERMNO to NAICS!!
######
# this is the Link Table from the CRSP-Compustat Merged database
# it contains the elements: gvkey,LPERMNO,LINKDT,LINKENDDT
Link_Table = read.table("LinkTable.txt", header=T, sep=",")
#####

#######
# this is a Compustat file containing the following elements:
# gvkey,datadate,fyear,indfmt,consol,popsrc,datafmt,tic,cusip,conm,curcd,sale,exchg,cik,costat,naicsh,naics,sic
Compustat = read.csv("0a4a9373fde91d3a.csv", header=T)

#The NAICS Code changes over time, so we need to match this as well with a matrix that is as large as the CRSP Beta (...)
load("PERMNO_NAICS3.RData")

#Convert data.frames into a .txt and convert the dates:

Beta_M[,1] = as.numeric(format(Beta_M[,1],"%Y%m%d"))
Beta_M = as.matrix(Beta_M)
Beta_DR[,1] = as.numeric(format(Beta_DR[,1],"%Y%m%d"))
Beta_DR = as.matrix(Beta_DR)
Beta_CF[,1] = as.numeric(format(Beta_CF[,1],"%Y%m%d"))
Beta_CF = as.matrix(Beta_CF)
Beta_Up[,1] = as.numeric(format(Beta_Up[,1],"%Y%m%d"))
Beta_Up = as.matrix(Beta_Up)
Beta_Down[,1] = as.numeric(format(Beta_Down[,1],"%Y%m%d"))
Beta_Down = as.matrix(Beta_Down)

Weighting_Beta_M[,1] = as.numeric(format(Weighting_Beta_M[,1],"%Y%m%d"))
Weighting_Beta_M = as.matrix(Weighting_Beta_M)
Weighting_Beta_DR[,1] = as.numeric(format(Weighting_Beta_DR[,1],"%Y%m%d"))
Weighting_Beta_DR = as.matrix(Weighting_Beta_DR)
Weighting_Beta_CF[,1] = as.numeric(format(Weighting_Beta_CF[,1],"%Y%m%d"))
Weighting_Beta_CF = as.matrix(Weighting_Beta_CF)
Weighting_Beta_Up[,1] = as.numeric(format(Weighting_Beta_Up[,1],"%Y%m%d"))
Weighting_Beta_Up = as.matrix(Weighting_Beta_Up)
Weighting_Beta_Down[,1] = as.numeric(format(Weighting_Beta_Down[,1],"%Y%m%d"))
Weighting_Beta_Down = as.matrix(Weighting_Beta_Down)


Shrinking_Beta_M[,1] = as.numeric(format(Shrinking_Beta_M[,1],"%Y%m%d"))
Shrinking_Beta_M = as.matrix(Shrinking_Beta_M)
Shrinking_Beta_DR[,1] = as.numeric(format(Shrinking_Beta_DR[,1],"%Y%m%d"))
Shrinking_Beta_DR = as.matrix(Shrinking_Beta_DR)
Shrinking_Beta_CF[,1] = as.numeric(format(Shrinking_Beta_CF[,1],"%Y%m%d"))
Shrinking_Beta_CF = as.matrix(Shrinking_Beta_CF)
Shrinking_Beta_Up[,1] = as.numeric(format(Shrinking_Beta_Up[,1],"%Y%m%d"))
Shrinking_Beta_Up = as.matrix(Shrinking_Beta_Up)
Shrinking_Beta_Down[,1] = as.numeric(format(Shrinking_Beta_Down[,1],"%Y%m%d"))
Shrinking_Beta_Down = as.matrix(Shrinking_Beta_Down)


Weighting_Shrinking_Beta_M[,1] = as.numeric(format(Weighting_Shrinking_Beta_M[,1],"%Y%m%d"))
Weighting_Shrinking_Beta_M = as.matrix(Weighting_Shrinking_Beta_M)
Weighting_Shrinking_Beta_DR[,1] = as.numeric(format(Weighting_Shrinking_Beta_DR[,1],"%Y%m%d"))
Weighting_Shrinking_Beta_DR = as.matrix(Weighting_Shrinking_Beta_DR)
Weighting_Shrinking_Beta_CF[,1] = as.numeric(format(Weighting_Shrinking_Beta_CF[,1],"%Y%m%d"))
Weighting_Shrinking_Beta_CF = as.matrix(Weighting_Shrinking_Beta_CF)
Weighting_Shrinking_Beta_Up[,1] = as.numeric(format(Weighting_Shrinking_Beta_Up[,1],"%Y%m%d"))
Weighting_Shrinking_Beta_Up = as.matrix(Weighting_Shrinking_Beta_Up)
Weighting_Shrinking_Beta_Down[,1] = as.numeric(format(Weighting_Shrinking_Beta_Down[,1],"%Y%m%d"))
Weighting_Shrinking_Beta_Down = as.matrix(Weighting_Shrinking_Beta_Down)


PERMNO = PERMNO_NAICS3[1,][-1]

library(data.table)
library(stringi)

setDT(HHI_3)
#Delete all HHI_3 with NA in the NAICS_3
HHI_3 = HHI_3[!is.na(HHI_3$NAICS_3),]
HHI_3 = HHI_3[!is.na(HHI_3$V1),]

HHI_3 = HHI_3[HHI_3$fyear>1971,]
#All NAICS Values per year, we have around 27 to 29 industries in each tercile:
#Low HHI Threshold:
HHI_3_Low  = HHI_3[,NAICS_3[V1<quantile(V1, probs = c(0.33),na.rm=T)],by="fyear"]
#High HHI Threshold:
HHI_3_High = HHI_3[,NAICS_3[V1>quantile(V1, probs = c(0.67),na.rm=T)],by="fyear"]

########################## So that is the Beta load in for Monthly:
#HHI_3 is only available after 1971:
PERMNO_NAICS3 = PERMNO_NAICS3[PERMNO_NAICS3[,1]>19720101,]
Beta_CF = Beta_CF[Beta_CF[,1]>19720101,]
Beta_M = Beta_M[Beta_M[,1]>19720101,]
Beta_DR = Beta_DR[Beta_DR[,1]>19720101,]
Beta_Up = Beta_Up[Beta_Up[,1]>19720101,]
Beta_Down = Beta_Down[Beta_Down[,1]>19720101,]
Weight_Beta_CF = Weighting_Beta_CF[Weighting_Beta_CF[,1]>19720101,]
Weight_Beta_M = Weighting_Beta_M[Weighting_Beta_M[,1]>19720101,]
Weight_Beta_DR = Weighting_Beta_DR[Weighting_Beta_DR[,1]>19720101,]
Weight_Beta_Up = Weighting_Beta_Up[Weighting_Beta_Up[,1]>19720101,]
Weight_Beta_Down = Weighting_Beta_Down[Weighting_Beta_Down[,1]>19720101,]
Shrinking_Beta_CF = Shrinking_Beta_CF[Shrinking_Beta_CF[,1]>19720101,]
Shrinking_Beta_M = Shrinking_Beta_M[Shrinking_Beta_M[,1]>19720101,]
Shrinking_Beta_DR = Shrinking_Beta_DR[Shrinking_Beta_DR[,1]>19720101,]
Shrinking_Beta_Up = Shrinking_Beta_Up[Shrinking_Beta_Up[,1]>19720101,]
Shrinking_Beta_Down = Shrinking_Beta_Down[Shrinking_Beta_Down[,1]>19720101,]
Weight_Shrinking_Beta_CF = Weighting_Shrinking_Beta_CF[Weighting_Shrinking_Beta_CF[,1]>19720101,]
Weight_Shrinking_Beta_M = Weighting_Shrinking_Beta_M[Weighting_Shrinking_Beta_M[,1]>19720101,]
Weight_Shrinking_Beta_DR = Weighting_Shrinking_Beta_DR[Weighting_Shrinking_Beta_DR[,1]>19720101,]
Weight_Shrinking_Beta_Up = Weighting_Shrinking_Beta_Up[Weighting_Shrinking_Beta_Up[,1]>19720101,]
Weight_Shrinking_Beta_Down = Weighting_Shrinking_Beta_Down[Weighting_Shrinking_Beta_Down[,1]>19720101,]
Weight_Beta_RME = Weighting_Beta_RME[Weighting_Beta_RME[,1]>19720101,]
Weight_Beta_SMB = Weighting_Beta_SMB[Weighting_Beta_SMB[,1]>19720101,]
Weight_Beta_HML = Weighting_Beta_HML[Weighting_Beta_HML[,1]>19720101,]
Weight_Beta_CMA = Weighting_Beta_CMA[Weighting_Beta_CMA[,1]>19720101,]
Weight_Beta_RMW = Weighting_Beta_RMW[Weighting_Beta_RMW[,1]>19720101,]
Weight_Beta_Mom = Weighting_Beta_Mom[Weighting_Beta_Mom[,1]>19720101,]
Weight_Beta_RME_Uni = Weighting_Beta_RME_Uni[Weighting_Beta_RME_Uni[,1]>19720101,]
Weight_Beta_SMB_Uni = Weighting_Beta_SMB_Uni[Weighting_Beta_SMB_Uni[,1]>19720101,]
Weight_Beta_HML_Uni = Weighting_Beta_HML_Uni[Weighting_Beta_HML_Uni[,1]>19720101,]
Weight_Beta_CMA_Uni = Weighting_Beta_CMA_Uni[Weighting_Beta_CMA_Uni[,1]>19720101,]
Weight_Beta_RMW_Uni = Weighting_Beta_RMW_Uni[Weighting_Beta_RMW_Uni[,1]>19720101,]
Weight_Beta_Mom_Uni = Weighting_Beta_Mom_Uni[Weighting_Beta_Mom_Uni[,1]>19720101,]
I_Vol = I_Vol[I_Vol[,1]>19720101,]


#Panel Regression; Build the Panel in the following way:
#time in yyyymm   Industry Code (NAICS)   HHI(annually), fill NA's    Beta_M    Beta_CF   Beta_DR   

#memory.limit(45000)
#Time, add this one for every PERMNO (which is every column in the matrix)
PANEL_NAICS = data.frame(Time = rep(PERMNO_NAICS3[2:dim(PERMNO_NAICS3)[1],1],dim(PERMNO_NAICS3)[2]-1), NAICS = c(PERMNO_NAICS3[2:dim(PERMNO_NAICS3)[1],-1]), PERMNO = rep(PERMNO,each=dim(PERMNO_NAICS3)[1]-1),
                         HHI = rep(NA,dim(PERMNO_NAICS3)[2]-1), Beta_M = c(Beta_M[,-1]), Beta_CF = c(Beta_CF[,-1]), Beta_DR = c(Beta_DR[,-1]), Beta_Down = c(Beta_Down[,-1]), 
                         Beta_Up = c(Beta_Up[,-1]),  Weight_Beta_M = c(Weight_Beta_M[,-1]), Weight_Beta_CF = c(Weight_Beta_CF[,-1]), Weight_Beta_DR = c(Weight_Beta_DR[,-1]), Weight_Beta_Down = c(Weight_Beta_Down[,-1]), 
                         Weight_Beta_Up = c(Weight_Beta_Up[,-1]),  Shrinking_Beta_M = c(Shrinking_Beta_M[,-1]), Shrinking_Beta_CF = c(Shrinking_Beta_CF[,-1]), Shrinking_Beta_DR = c(Shrinking_Beta_DR[,-1]), Shrinking_Beta_Down = c(Shrinking_Beta_Down[,-1]), 
                         Shrinking_Beta_Up = c(Shrinking_Beta_Up[,-1]),Weighting_Shrinking_Beta_M = c(Weight_Shrinking_Beta_M[,-1]), Weighting_Shrinking_Beta_Up = c(Weight_Shrinking_Beta_Up[,-1]),Weighting_Shrinking_Beta_Down = c(Weight_Shrinking_Beta_Down[,-1]),
                         Weighting_Shrinking_Beta_CF = c(Weight_Shrinking_Beta_CF[,-1]),Weighting_Shrinking_Beta_DR = c(Weight_Shrinking_Beta_DR[,-1]), I_Vol = c(I_Vol[2:dim(I_Vol)[1],-1]))

PANEL_NAICS = PANEL_NAICS[!is.na(PANEL_NAICS$NAICS),]
PANEL_NAICS = PANEL_NAICS[order(PANEL_NAICS$Time),]

HHI_3 = HHI_3[HHI_3$fyear<2019,]

options(warn=2)
for(i in 1:nrow(HHI_3)){
  Temp = PANEL_NAICS[HHI_3$NAICS_3[i] == PANEL_NAICS$NAICS,]
  Temp_HHI = HHI_3[HHI_3$NAICS_3[i] == HHI_3$NAICS_3,]
  Temp_HHI = Temp_HHI[order(Temp_HHI$fyear),]
  Expand_fyear = data.frame(fyear = rep(NA,length(unique(as.numeric(unlist(lapply(strsplit(as.character(Temp$Time),split=""),function(x) stri_paste(head(x,n=4),collapse=""))))))))
  Expand_fyear$fyear = unique(as.numeric(unlist(lapply(strsplit(as.character(Temp$Time),split=""),function(x) stri_paste(head(x,n=4),collapse="")))))
  
  Temp_HHI = merge(Temp_HHI,Expand_fyear,by="fyear",all.x=T,all.y=T)
  #Fill NA's from HHI!! By the Date:
  library(stringi)
  #How often does each year occur:
  Temp_Second = cbind(Temp_HHI$V1,table(as.numeric(unlist(lapply(strsplit(as.character(Temp$Time),split=""),function(x) stri_paste(head(x,n=4),collapse=""))))))
  Temp_Second = unname(rep(Temp_Second[,1],Temp_Second[,2]))
  
  Temp$HHI = Temp_Second
  
  PANEL_NAICS[HHI_3$NAICS_3[i] == PANEL_NAICS$NAICS,] = Temp
  print(paste("iteration",i,"of",nrow(HHI_3)))
}
options(warn=1)

save(PANEL_NAICS, file = "PANEL_NAICS.RData")
